{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-07-16 15:54:42.586\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mbyzerllm.utils.connect_ray\u001b[0m:\u001b[36mconnect_cluster\u001b[0m:\u001b[36m37\u001b[0m - \u001b[1mJDK 21 will be used (/Users/allwefantasy/.auto-coder/jdk-21.0.2.jdk/Contents/Home)...\u001b[0m\n",
      "2024-07-16 15:54:42,665\tINFO worker.py:1564 -- Connecting to existing Ray cluster at address: 127.0.0.1:6379...\n",
      "2024-07-16 15:54:42,666\tINFO worker.py:1582 -- Calling ray.init() again after it has already been called.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "ExtractedData(qa_pairs=[])"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import byzerllm\n",
    "llm = byzerllm.ByzerLLM.from_default_model(\"deepseek_chat\")\n",
    "\n",
    "  \n",
    "@byzerllm.prompt()\n",
    "def _convert_pretrain_text_to_instruction(text: str)->str:\n",
    "    '''\n",
    "    根据提供的信息，生成多个相关的问题，这些问题的回答，最终要能覆盖里面所有的信息。\n",
    "\n",
    "    下面是一些信息：\n",
    "\n",
    "    {{text}}    \n",
    "    '''\n",
    "\n",
    "@byzerllm.prompt()\n",
    "def _format(text: str)->str:\n",
    "    '''\n",
    "    下面是一些问答信息：\n",
    "    \n",
    "    {{text}}\n",
    "\n",
    "\n",
    "    请将每个问题使用<_question_>标签包裹，每个回答使用<_answer_>标签包裹，最后\n",
    "    每个一组问题和回答使用<_group_>标签包裹。\n",
    "    '''\n",
    "\n",
    "from typing import List\n",
    "from pydantic import BaseModel\n",
    "\n",
    "class QAPair(BaseModel):\n",
    "    question: str\n",
    "    answer: str\n",
    "\n",
    "class ExtractedData(BaseModel):\n",
    "    qa_pairs: List[QAPair]\n",
    "\n",
    "  \n",
    "# v = _convert_pretrain_text_to_instruction.with_llm(llm).run(\"祝海林的生日是2月1号\")\n",
    "# v2 = _format.with_llm(llm).run(v)\n",
    "# result = extract_data(v2)\n",
    "# print(result)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-07-26 12:34:22.656\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mbyzerllm.utils.connect_ray\u001b[0m:\u001b[36mconnect_cluster\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mJDK 21 will be used (/Users/allwefantasy/.auto-coder/jdk-21.0.2.jdk/Contents/Home)...\u001b[0m\n",
      "2024-07-26 12:34:22,713\tINFO worker.py:1564 -- Connecting to existing Ray cluster at address: 127.0.0.1:6379...\n",
      "2024-07-26 12:34:22,714\tINFO worker.py:1582 -- Calling ray.init() again after it has already been called.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'这张图片展示了许多可爱的猫咪，它们的脸部特写呈现出各种不同的表情和颜色，背景是柔和的色调，配有一些红色的花朵装饰。整体风格非常温馨和艺术化。'"
      ]
     },
     "execution_count": 2,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import byzerllm\n",
    "import json\n",
    "\n",
    "vl_llm = byzerllm.ByzerLLM.from_default_model(\"gpt4o_mini_chat\")\n",
    "image = byzerllm.Image.load_image_from_path(\n",
    "    \"/Users/allwefantasy/projects/byzer-llm/images/cat1.png\"\n",
    ")\n",
    "v = vl_llm.chat_oai(\n",
    "    conversations=[\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": json.dumps(\n",
    "                [{\"image\": image, \"text\": \"这个图片里有什么？\"}], ensure_ascii=False\n",
    "            ),\n",
    "        }\n",
    "    ]\n",
    ")\n",
    "v[0].output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "\u001b[32m2024-07-26 12:37:32.451\u001b[0m | \u001b[1mINFO    \u001b[0m | \u001b[36mbyzerllm.utils.connect_ray\u001b[0m:\u001b[36mconnect_cluster\u001b[0m:\u001b[36m48\u001b[0m - \u001b[1mJDK 21 will be used (/Users/allwefantasy/.auto-coder/jdk-21.0.2.jdk/Contents/Home)...\u001b[0m\n",
      "2024-07-26 12:37:32,504\tINFO worker.py:1564 -- Connecting to existing Ray cluster at address: 127.0.0.1:6379...\n",
      "2024-07-26 12:37:32,505\tINFO worker.py:1582 -- Calling ray.init() again after it has already been called.\n"
     ]
    },
    {
     "data": {
      "text/plain": [
       "'这张图片展示了一群可爱的猫咪，它们的脸部特征非常明显，表情各异。背景是简单的颜色，猫咪的周围还有一些红色的花朵，增添了画面的生动感。整体风格看起来很温馨和艺术化。'"
      ]
     },
     "execution_count": 3,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "import byzerllm\n",
    "import json\n",
    "\n",
    "vl_llm = byzerllm.ByzerLLM.from_default_model(\"gpt4o_mini_chat\")\n",
    "image = byzerllm.Image.load_image_from_path(\n",
    "    \"/Users/allwefantasy/projects/byzer-llm/images/cat1.png\"\n",
    ")\n",
    "v = vl_llm.chat_oai(\n",
    "    conversations=[\n",
    "        {\n",
    "            \"role\": \"user\",\n",
    "            \"content\": json.dumps(\n",
    "                [\n",
    "                    {\n",
    "                        \"type\": \"image_url\",\n",
    "                        \"image_url\": {\"url\": image, \"detail\": \"high\"},\n",
    "                    },\n",
    "                    {\"text\": \"这个图片里有什么？\", \"type\": \"text\"},\n",
    "                ],\n",
    "                ensure_ascii=False,\n",
    "            ),\n",
    "        }\n",
    "    ]\n",
    ")\n",
    "v[0].output"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "start_tag='<_ROOT_>' end_tag='</_ROOT_>' content=[Tag(start_tag='<_group_>', end_tag='</_group_>', content='', parent=Tag(start_tag='<_ROOT_>', end_tag='</_ROOT_>', content=[...], parent=None)), Tag(start_tag='<_group_>', end_tag='</_group_>', content='', parent=Tag(start_tag='<_ROOT_>', end_tag='</_ROOT_>', content=[...], parent=None)), Tag(start_tag='<_group_>', end_tag='</_group_>', content='', parent=Tag(start_tag='<_ROOT_>', end_tag='</_ROOT_>', content=[...], parent=None)), Tag(start_tag='<_group_>', end_tag='</_group_>', content='', parent=Tag(start_tag='<_ROOT_>', end_tag='</_ROOT_>', content=[...], parent=None)), Tag(start_tag='<_group_>', end_tag='</_group_>', content='', parent=Tag(start_tag='<_ROOT_>', end_tag='</_ROOT_>', content=[...], parent=None))] parent=None\n",
      "<_group_> \n",
      "<_group_> \n",
      "<_group_> \n",
      "<_group_> \n",
      "<_group_> \n"
     ]
    }
   ],
   "source": [
    "from byzerllm.apps.utils import TagExtractor\n",
    "\n",
    "# Example usage:\n",
    "\n",
    "sample_text = \"\"\"<_group_> <_question_>祝海林的生日是哪一天？</_question_> <_answer_>祝海林的生日是2月1号。</_answer_> </_group_>\n",
    "<_group_> <_question_>祝海林的生日在哪个季节？</_question_> <_answer_>祝海林的生日在冬季，因为2月是冬季的一部分。</_answer_> </_group_>\n",
    "<_group_> <_question_>祝海林的生日在2月的哪一天？</_question_> <_answer_>祝海林的生日在2月的第一天，即2月1号。</_answer_> </_group_>\n",
    "<_group_> <_question_>祝海林的生日是否在中国的春节期间？</_question_> <_answer_>这取决于具体年份的春节日期，但通常2月1号可能接近或就在春节期间。</_answer_> </_group_>\n",
    "<_group_> <_question_>祝海林的生日是否在公历的2月？</_question_> <_answer_>是的，祝海林的生日在公历的2月1号。</_answer_> </_group_>\"\"\"\n",
    "\n",
    "# Parse as list of dictionaries\n",
    "result_list = TagExtractor(sample_text).extract()\n",
    "print(result_list)\n",
    "for item in result_list.content:\n",
    "    item.parent = None\n",
    "    print(item.start_tag,item.content)\n",
    "    for item1 in item.content:\n",
    "        print(\"==\"+item1.start_tag)\n",
    "        item1.parent = None\n",
    "        print(item1.model_dump_json(indent=2))    "
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "byzerllm",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.10.11"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
